cf <- readRDS("data/campfire-tweets-2020-04-17.Rds")
Sources <- cf %>%
filter(str_detect(screen_name, "CALFIRE_ButteCo|Cal_Fire|ButteSheriff|ChicoPolice|ChicoFD|CountyOfButte|Paradise_CA"))
no_outliers <- top_n(Sources, -29, created_at_pst)
no_outliers %>%
group_by(screen_name) %>%
summarize(min(created_at_pst))
## # A tibble: 3 x 2
## screen_name `min(created_at_pst)`
## <chr> <dttm>
## 1 ButteSheriff 2018-11-08 08:03:55
## 2 CALFIRE_ButteCo 2018-11-08 06:51:47
## 3 ChicoFD 2018-11-08 07:46:17
no_outliers$screen_name <- as.factor(no_outliers$screen_name)
no_outliers %>% group_by(tweet_hour, screen_name, tweet_min) %>%
summarize(tweet_count=n()) %>%
ggplot(aes(x=tweet_hour, y=tweet_count, fill=screen_name)) + geom_col()
range(Sources$created_at_pst)
## [1] "2018-11-08 06:51:47 PST" "2018-12-19 13:46:14 PST"
plot.fav <- no_outliers %>% filter(favorite_count>1) %>% ggplot(aes(x=favorite_count, fill=screen_name)) + geom_histogram()
plot.rt <- no_outliers %>% filter(retweet_count>1) %>% ggplot(aes(x=retweet_count, fill=screen_name)) + geom_histogram()
plot.quo <- no_outliers %>% filter(quote_count>1) %>% ggplot(aes(x=quote_count, fill=screen_name)) + geom_histogram()
plot.rply <- no_outliers %>% filter(reply_count>1) %>% ggplot(aes(x=reply_count, fill=screen_name)) + geom_histogram()
gridExtra::grid.arrange(plot.fav, plot.rt, plot.quo, plot.rply, nrow=2)
name_levels <- c("CALFIRE_ButteCo", "ButteSheriff", "ChicoFD")
status_colors <- c("#0070C0", "#00B050", "#FFC000")
no_outliers$screen_name <- factor(no_outliers$screen_name, levels=name_levels, ordered=TRUE)
positions <- c(-2, -0.5, -1.0, 2, -1.5, 1.5, 1, .5, 2.5, -2.5, 3, -3, 3.5, -3.5, 4, -4)
directions <- c(1, -1)
line_pos <- data.frame(
"created_at_pst"=unique(no_outliers$created_at_pst),
"position"=rep(positions, length.out=length(unique(no_outliers$created_at_pst))),
"direction"=rep(directions, length.out=length(unique(no_outliers$created_at_pst)))
)
no_outliers <- merge(x=no_outliers, y=line_pos, by="created_at_pst", all = TRUE)
no_outliers <- no_outliers[with(no_outliers, order(created_at_pst, screen_name)), ]
hour_buffer <- 2
date_range <- seq(min(no_outliers$created_at_pst) - hours(hour_buffer),
max(no_outliers$created_at_pst) + hours(hour_buffer), by='hour')
date_format <- format(date_range, '%H:%M')
date_df <- data.frame(date_range, date_format)
text_offset <- 0.05
no_outliers$text_position <- (text_offset * no_outliers$direction) +
no_outliers$position
no_outliers$text_output <- substr(no_outliers$text, 1, 20)
timeline_plot<-ggplot(no_outliers,aes(x=created_at_pst,y=0, col=screen_name,
label=text_output)) + labs(col="Tweets")
#timeline_plot<-timeline_plot+labs(col="Tweets")
timeline_plot<-timeline_plot+scale_color_manual(values=status_colors,
labels=name_levels, drop = FALSE)
timeline_plot<-timeline_plot+theme_classic()
# Plot horizontal black line for timeline
timeline_plot<-timeline_plot+geom_hline(yintercept=0,
color = "black", size=0.3)
# Plot vertical segment lines for texts
timeline_plot<-timeline_plot+geom_segment(data=no_outliers,
aes(y=position,yend=0,xend=created_at_pst),
color='black', size=0.2)
# Plot scatter points at zero and date
timeline_plot<-timeline_plot+geom_point(aes(y=0), size=3)
# Don't show axes, appropriately position legend
timeline_plot<-timeline_plot+theme(axis.line.y=element_blank(),
axis.text.y=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.x =element_blank(),
axis.ticks.x =element_blank(),
axis.line.x =element_blank(),
legend.position = "bottom"
)
# Show text for each hour
timeline_plot<-timeline_plot+geom_text(data=date_df,
aes(x=date_range,y=-0.1, label=date_format),
size=2.5,vjust=0.5, color='black', angle=45)
# Show text for each text
timeline_plot<-timeline_plot+geom_text(aes(y=text_position,label=text_output),size=2.5)
print(timeline_plot)
Second Plot Type
library(vistime)
vistime(no_outliers, events = "text", groups = "screen_name",
start = "created_at_pst", optimize_y = FALSE, show_labels=FALSE)
And another
library(timelineS)
#timelineS(no_outliers, main = "Life of Michael Jackson")
news <- cf %>%
filter(str_detect(screen_name,"news|News") | str_detect(description, "news|News")) %>%
filter(verified=="TRUE")
news_orgs <- cf %>%
users_data() %>%
distinct(screen_name, .keep_all = TRUE) %>%
filter(str_detect(screen_name, "news|News") | str_detect(description, "news|News")) %>%
filter(verified=="TRUE") %>%
arrange(desc(followers_count))
news$user_type <- "news"
public <- anti_join(x = cf, y = news_orgs, by = "screen_name")
public$user_type <- "public"
cf <- rbind(public, news)
top.20.users <- news %>%
group_by(screen_name) %>%
summarise(n=n()) %>%
arrange(desc(n)) %>%
slice(1:20)
ggplot(top.20.users, aes(x = reorder(screen_name, -n), y=n)) +
geom_bar(stat="identity", fill="darkslategray")+
theme_minimal() + coord_flip() +
xlab("Users") + ylab("Count")
I’m interested in the sentiment difference between users who are considered a news outlet vs the general public. We plan to look at more individual political people like the Sheriff when going through this more thoroughly in our project.
ts1 <- tweet_words_nostop %>%
inner_join(get_sentiments("afinn"))
ts2 <- ts1 %>% group_by(status_id) %>% summarize(sentiment=sum(value))
cf2 <- Sources %>% left_join(ts2, by='status_id')
ggplot(cf2, aes(x=sentiment, col=screen_name)) + geom_density(lwd=2) + theme_minimal()